<?php
/**
 * Created by PhpStorm.
 * User: Administrator
 * Date: 2022/4/23 0023
 * Time: 11:46
 * 有来文章
 */
defined('BASEPATH') OR exit('No direct script access allowed');
include_once('./simple_html_dom.php');

class Youlai_article extends CI_Controller
{
    /**
     * 获取科室对应疾病id
     */
    public function ids()
    {
        exit;
        $url = "https://www.youlai.cn/dise/pk_1_0_1.html";
        $content = geturl($url);
        $html = str_get_html($content);
        foreach ($html->find('dl[class=article_l_top disSearchMargin]',0)->find('dt', 0)->find('p',0)->find('a') as $ak => $a) {
            $dept1 = $a->href;
            $dept1_id = str_replace(['/dise/pk_', '_0_1.html'], '', $dept1);
            $dept1_name = trim($a->innertext);
            $url = "https://www.youlai.cn/dise/pk_" . $dept1_id . "_0_1.html";
            //echo $url;exit;
            $content = geturl($url);
            $html = str_get_html($content);
            foreach ($html->find('dl[class=article_l_top disSearchMargin]',0)->find('dd', 0)->find('p',0)->find('a') as $ak2 => $a2) {
                if($ak2 == 0) {
                    continue;
                }
                $dept2 = $a2->href;
                $dept2_id = str_replace(['/dise/pk_'.$dept1_id.'_', '_1.html'], '', $dept2);
                $dept2_name = trim($a2->innertext);


                $sql = "";
                $url = "https://www.youlai.cn/dise/pk_" . $dept1_id . "_" . $dept2_id . "_1.html";
                $content = geturl($url);
                $html = str_get_html($content);
                echo $url."<br>";
                // 通过科室id获取疾病列表
//                if(count($html->find('div[class=disSearchCon]')->find('dl[class=textList]', 0)->find('dt')) <= 0){
//                    continue;
//                }
                foreach ($html->find('div[class=disSearchList cur]',0)->find('dl[class=textList]', 0)->find('dt') as $aak => $aa) {
                    foreach ($aa->find('a') as $aaak => $aaa) {
                        $dis = $aaa->href;
                        $dis_id = str_replace(['/dise/', '.html'], '', $dis);
                        $dis_name = trim($aaa->innertext);
                        $sql .= "('" . $dept1_id . "','" . $dept2_id .  "','" . $dept1_name .  "','" . $dept2_name . "'," . $dis_id . ",'" . $dis_name . "'),";
                    }

                }
                $sql = substr($sql, 0, -1);
                $sql = 'INSERT ignore INTO ai_youlai_article_ids(dept1_id,dept2_id,dept1_name,dept2_name,dis_id,dis_name) VALUES ' . $sql;
                //echo $sql;exit;
                $this->db->query($sql);

            }

        }
    }

    public function urls(){
        exit;
        $redis = new Redis();
        $redis->connect('82.157.157.146');
        $redis->auth('spring');
        // 从list拿出来一个，如果执行不成功，写到另外的list里
        $did = $redis->lpop('youlai_arc_list');
        //$did = 115;
        if(!$did){
            echo "nodata";
            exit;
        }
        $ids = $this->db->where('id',$did)->get('youlai_article_ids')->row_array();
        if(!$ids){
            echo "nodata";
            exit;
        }
//        $proxy_user = $this->db->query("select * from ai_spider_yaozh_user where id=42")->row_array();
//        $proxy_user_info = json_decode($proxy_user['proxy_info'],true);
//        $proxy['proxy'] = $proxy_user_info['proxy'];
//        $proxy['userpwd'] = $proxy_user_info['userpwd'];
        $first_url = "https://www.youlai.cn/dise/articlelist/".$ids['dis_id']."_1.html";
        // 获取总页数
        //dd($proxy);
        $content = geturl($first_url);
        $html = str_get_html($content);
        if(!str_replace(" ","",$html->find('div[class=pageyl]',0)->innertext)){
//            sleep(3);
//            $redis->rPush('youlai_arc_list',$did);
            $pages = 1;
        }else{
            $pages_li = $html->find('div[class=pageyl]',0)->find('ul',0)->find('li');
            if(count($pages_li) > 0){
                // dd($pages_li);
                $pages = (int)array_reverse($pages_li)[1]->find('a',0)->innertext;
            }
        }
        for($i=1;$i<=$pages;$i++){ // 最多十页
            //https://www.youlai.cn/ask/voicelist/1_12_1_0_2.html
            $list_html = "https://www.youlai.cn/dise/articlelist/".$ids['dis_id']."_".$i.".html";
            //echo $list_html."<br>";
            $content = geturl($list_html);
//            if($i < 3){
//
//                continue;
//            }
//            echo $list_html;
//            echo $content;exit;
            $html = str_get_html($content);
            $lis = $html->find("ul[class=article_left article_l_list bd_none] li");

            if(!$lis){
                continue;
            }
            $sql = "";
            $lis = array_slice($lis,0,10);
            foreach($lis as $lik=>$li){

//                echo $lik."-";
//                if($lik == 10){
//                    echo $content;
//                    dd($li->innertext);
//                }
                $did = $ids['dis_id'];
                $dept1 = $ids['dept1_name'];
                $dept2 = $ids['dept2_name'];
                $dis = str_replace('<i></i>','',$ids['dis_name']);
                $title = addslashes(trim(strip_tags($li->find("h3",0)->find("a",0)->innertext)));
                $url = trim($li->find("h3",0)->find("a",0)->href);
                $sql .= "(".$did.",'".$dept1."','".$dept2."','".$dis."','".$title."','".$url."'),";
                //echo $sql."<br>";
            }
            //echo $sql;exit;
            //echo $i;
            $sql = substr($sql,0,-1);
            $sql = 'INSERT ignore INTO ai_youlai_article_url(did,dept1,dept2,dis,title,url) VALUES '.$sql;
//            echo $sql;
//            echo "<br>";
            $this->db->query($sql);
        }

        echo $did + 1;
    }

    public function info(){
        $redis = new Redis();
        $redis->connect('82.157.157.146');
        $redis->auth('spring');
        // 从list拿出来一个，如果执行不成功，写到另外的list里
        $urlid = $redis->lpop('youlai_arc_view');
        //$urlid = 1;
        if(!$urlid){
            echo "nodata";
            exit;
        }
        $url_info = $this->db->where('id',$urlid)->get('ai_youlai_article_url')->row_array();
        if(!$url_info){
            echo "nodata";
            exit;
        }
        //echo "采集开始:".time()."-";
        $detail_url = "https://www.youlai.cn".$url_info['url'];
        $content = geturl($detail_url);
        $html = str_get_html($content);
        if($html->find("div[id=cbox]")){
            sleep(3);
            $redis->rPush('youlai_arc_view_bak',$urlid);
            echo 1;exit;
        }
        // 处理对应字段
        $department1 = $url_info['dept1'];
        $department2 = $url_info['dept2'];
        $disease = $url_info['dis'];
        $article_name = $url_info['title'];
        $article_content = trim($html->find("div[class=v_con art_con border_top1]",0)->find("div[class=text]",0)->innertext);
        $pubdate = trim($html->find("span[class=fl_left time]",0)->innertext);

        $author_name = $html->find("strong",0)->innertext;
        $author_title = $html->find("ul[class=mgBottom10]",0)->find("li",1)->find("span",0)->innertext;
        $author_department = $html->find("dl[class=doc_pic_box pdTop10 clearfix]",0)->find("dd",0)->find("a",0)->find("p",1)->innertext;
        $author_hospital = $html->find("dl[class=doc_pic_box pdTop10 clearfix]",0)->find("dd",0)->find("a",0)->find("p",0)->innertext;

        // 点击数量需要通过python重新获取，接口这边加密了
        $arc_id = str_replace(["/yyk/article/",".html"],"",$url_info['url']);
        $data = "info=".urlencode('[{"a_id":"'.$arc_id.'","a_type":1}]');
        //echo $data;exit;
        $res = http_post("https://www.youlai.cn/get/tongji",$data);
        $res_info = json_decode($res,true);
        $click_num = 0;
        if(!is_array($res_info)){
            //
        }else{
            $click_num = $res_info['data']['list'][0]['a_views'];
        }
        $sql = "INSERT ignore INTO ai_spider_youlai_disease_article(department_1,department_2,disease,article_name,article_content,pubdate,click_num,author_name,author_title,author_department,author_hospital,vid) VALUES ('".$department1."','".$department2."','".$disease."','".$article_name."','".$article_content."','".$pubdate."','".$click_num."','".$author_name."','".$author_title."','".$author_department."','".$author_hospital."',$urlid)";
        $this->db->query($sql);
        echo 1;
        //$ = $html->find("ul[id=videoList] li");
    }

    public function url_push(){
        $redis = new Redis();
        $redis->connect('82.157.157.146');
        $redis->auth('spring');
        $ids = $this->db->query("select id from ai_youlai_article_url where id>85002")->result_array();
        foreach($ids as $id){
            $redis->rPush('youlai_arc_view',$id['id']);
        }

    }
}